

 ******************************************************************************
 *																			  *
 *							---	Main Tables	---								  *
 *																			  *
 ******************************************************************************
 



 *----------------------------------------------------------------------------*
 *		--- Table I Descriptive Statistics on Candidates and Companies ---	  *
 *----------------------------------------------------------------------------*
 
{
* Load Data Set

$opendata
$dropvars
$timerestrict

matrix J=J(18,10,.)

* Number of Candidates 
preserve
	collapse (first) female, by(sid)
	sum female,d
	mat J[1,2] = r(N)
	sum female if female==0,d
	mat J[1,4] = r(N)
	sum female if female==1,d
	mat J[1,6] = r(N)
	mat J[1,8] =  J[1,4] - J[1,6]
restore

* leave out individuals with unknown gender
keep if female!=2

* Average number of bids received per candidate (conditional on getting at least one bid)
preserve
keep if d_sent > 0
	collapse (sum) d_sent (mean) female s_hired, by(sid)
	sum d_sent, d
	matrix J[2,2]=`: di %5.1fc r(mean)'
	sum d_sent if female==0 , d
	matrix J[2,4]=`: di %5.1fc r(mean)'
	sum d_sent if female==1, d
	matrix J[2,6]=`: di %5.1fc r(mean)'
	mat J[2,8] = J[2,4]-J[2,6]
	ttest d_sent,by (female)
	mat J[2,10] = r(p)
restore

* Probability of accepting an interview request
	sum d_accept if s_job_position<=5, d	
	matrix J[3,2]=`: di %5.1fc r(mean)*100'
	sum d_accept if  female==0 & s_job_position<=5, d
	matrix J[3,4]=`: di %5.1fc r(mean)*100'
	sum d_accept if female==1 & s_job_position<=5, d
	matrix J[3,6]=`: di %5.1fc r(mean)*100'
	mat J[3,8] = J[3,4]-J[3,6]
	keep if female!=2
	ttest d_accept if s_hired & s_job_position<=5, by(female)
	mat J[3,10] = r(p)	


*--------------------------------------*	
* collapse at the candidate level
sort sid batch_start_date
collapse (first) female $fcontrols $ccontrols $preferences smonthyear , by(sid)

*--------------------------------------*

* Share with a bachelor
	gen bachelor = cat_degree  >= 2
	gen master = cat_degree >=3 
	foreach var in bachelor master{
	if "`var'" == "bachelor"{
		local row = 4
	}
	if "`var'" == "master"{
		local row = 5
	}
	sum `var'
	mat J[`row',2] = `: di %5.1fc r(mean)*100'
	sum `var' if female == 0
	mat J[`row',4] = `: di %5.1fc r(mean)*100'
	sum `var' if female == 1
	mat J[`row',6] = `: di %5.1fc r(mean)*100'
	mat J[`row',8] = J[`row',4] - J[`row',6]
	ttest `var', by(female)
	mat J[`row',10] = r(p)
	}


* Share with a cs degree
	sum csd
	mat J[6,2] = `: di %5.1fc r(mean) *100'
	sum csd if female==0
	mat J[6,4] = `: di %5.1fc r(mean) *100'
	sum csd if female==1
	mat J[6,6] = `: di %5.1fc r(mean) *100'
	mat J[6,8] = J[6,4] - J[6,6]
	ttest csd, by(female)
	mat J[6,10] = r(p)


* share with ivyplus degree
	sum ivyplus
	mat J[7,2] = `: di %5.1fc r(mean) *100'
	sum ivyplus if female==0
	mat J[7,4] = `: di %5.1fc r(mean) *100'
	sum ivyplus if female==1
	mat J[7,6] = `: di %5.1fc r(mean) *100'
	mat J[7,8] = J[7,4] - J[7,6]
	ttest ivyplus, by(female)
	mat J[7,10] = r(p)


* Share looking for full time job
	replace s_contract = 0 if s_contract == 2 | s_contract == 4
	replace s_contract = 1 if s_contract == 3
	sum s_contract 
	mat J[8,2] =  `: di %5.1fc (1 - r(mean)) *100'
	sum s_contract if female==0
	mat J[8,4] = `: di %5.1fc (1- r(mean)) *100'
	sum s_contract if female==1
	mat J[8,6] =  `: di %5.1fc (1 - r(mean)) *100'
	mat J[8,8] = J[8,4] - J[8,6]
	ttest s_contract, by(female)
	mat J[8,10] = r(p)

* Share looking for a job in SF
	gen sanfran = s_choice_location  == 1 
	sum sanfran 
	mat J[9,2] =  `: di %5.1fc r(mean) *100'
	sum sanfran if female==0
	mat J[9,4] = `: di %5.1fc r(mean) *100'
	sum sanfran if female==1
	mat J[9,6] =  `: di %5.1fc r(mean) *100'
	mat J[9,8] = J[9,4] - J[9,6]
	ttest sanfran, by(female)
	mat J[9,10] = r(p)


* Share in need of s_sponsorship
	sum s_sponsorship 
	mat J[10,2] = `: di %5.1fc r(mean) *100'
	sum s_sponsorship if female==0 
	mat J[10,4] = `: di %5.1fc r(mean) *100'
	sum s_sponsorship if female==1 
	mat J[10,6] = `: di %5.1fc r(mean) *100'
	mat J[10,8] = J[10,4] - J[10,6]
	ttest s_sponsorship, by(female)
	mat J[10,10] = r(p)

* experience
	sum s_total_exp
	mat J[11,2] = `: di %5.1fc r(mean)'
	local a = r(sd)
	di `a'
	sum s_total_exp if female==0
	mat J[11,4] = `: di %5.1fc r(mean)'
	sum s_total_exp if female==1
	mat J[11,6] = `: di %5.1fc r(mean)'
	mat J[11,8] = J[11,4] - J[11,6]
	ttest s_total_exp, by(female)
	mat J[11,10] = r(p)

* share in faang
	sum faang 
	mat J[12,2] = `: di %5.1fc r(mean) *100'
	sum faang if female==0
	mat J[12,4] = `: di %5.1fc r(mean) *100'
	sum faang if female==1 
	mat J[12,6] = `: di %5.1fc r(mean) *100'
	mat J[12,8] = J[12,4] - J[12,6]
	ttest faang, by(female)
	mat J[12,10] = r(p)

	
* share with more than 5 people reporting to them
	tab s_nb_reports, mi
	gen report_5plus = s_nb_reports  > 0
	sum report_5plus 
	mat J[13,2] = `: di %5.1fc r(mean) *100'
	sum report_5plus if female==0
	mat J[13,4] = `: di %5.1fc r(mean) *100'
	sum report_5plus if female==1 
	mat J[13,6] = `: di %5.1fc r(mean) *100'
	mat J[13,8] = J[13,4] - J[13,6]
	ttest report_5plus, by(female)
	mat J[13,10] = r(p)


* Share employed
	sum employed 
	mat J[14,2] = `: di %5.1fc r(mean) *100'
	sum employed if female==0
	mat J[14,4] = `: di %5.1fc r(mean) *100'
	sum employed if female==1 
	mat J[14,6] = `: di %5.1fc r(mean) *100'
	mat J[14,8] = J[14,4] - J[14,6]
	ttest employed, by(female)
	mat J[14,10] = r(p)


* number of days employed
preserve
	keep if  days_unemployed!= 0 
	keep if   days_unemployed < 1500
	sum days_unemployed , d
	mat J[15,2] = `: di %5.0fc r(p50)'
	sum days_unemployed if female==0 , d
	mat J[15,4] = `: di %5.0fc r(p50)'
	sum days_unemployed if female==1 , d
	mat J[15,6] = `: di %5.0fc r(p50)'
	mat J[15,8] = J[15,4] - J[15,6]
	median days_unemployed, by(female)
	mat J[15,10] = r(p)
restore

* Top 3 Occupations
	gen engineer = s_primary_field ==4
	gen design = s_primary_field ==3
	gen product = s_primary_field ==2

	sum engineer 
	mat J[16,2] = `: di %5.1fc r(mean) * 100'
	sum engineer if female ==0
	mat J[16,4] = `: di %5.1fc r(mean) * 100'
	sum engineer if female ==1 
	mat J[16,6] = `: di %5.1fc r(mean) * 100'
	mat J[16,8] = J[16,4] - J[16,6]
	ttest engineer, by(female)
	mat J[16,10] = r(p)

	sum design 
	mat J[17,2] = `: di %5.1fc r(mean) * 100'
	sum design if female ==0
	mat J[17,4] = `: di %5.1fc r(mean) * 100'
	sum design if female ==1 
	mat J[17,6] = `: di %5.1fc r(mean) * 100'
	mat J[17,8] = J[17,4] - J[17,6]
	ttest design, by(female)
	mat J[17,10] = r(p)

	sum product 
	mat J[18,2] = `: di %5.1fc r(mean) * 100'
	sum product if female ==0
	mat J[18,4] = `: di %5.1fc r(mean) * 100'
	sum product if female ==1 
	mat J[18,6] = `: di %5.1fc r(mean) * 100'
	mat J[18,8] = J[18,4] - J[18,6]
	ttest product, by(female)
	mat J[18,10] = r(p)


frmttable using "/Users/roussille/Dropbox (Personal)/paygap/table1.tex", tex statmat(J) varlabels  ///
fragment sdec(0 \1,1,1,1,1,1,1,1,3,3 \1,1,1,1,1,1,1,1,3,3 \1,1,1,1,1,1,1,1,3,3 \1,1,1,1,1,1,1,1,3,3 \1,1,1,1,1,1,1,1,3,3 \1,1,1,1,1,1,1,1,3,3 \1,1,1,1,1,1,1,1,3,3 \1,1,1,1,1,1,1,1,3,3 \1,1,1,1,1,1,1,1,3,3 \1,1,1,1,1,1,1,1,3,3 \1,1,1,1,1,1,1,1,3,3 \1,1,1,1,1,1,1,1,3,3 \1,1,1,1,1,1,1,1,3,3 \0,0,0,0,0,0,0,0,3,3 \1,1,1,1,1,1,1,1,3,3 \1,1,1,1,1,1,1,1,3,3 \1,1,1,1,1,1,1,1,3,3 \1,1,1,1,1,1,1,1,3,3) replace ///
ctitle("\multicolumn{11}{c}{\textit{Panel A: Descriptive Statistics on Candidates}} \\" , "", "\textbf{All}" , "", "\textbf{Male}", "", "\textbf{Female}", "", "\textbf{Difference}", "" "\textbf{p-value}") ///
rtitles("\textbf{Number of Candidates}" ///
\ "\textbf{Average number of bids received per candidate}" \ ///
"\textbf{Probability of accepting an interview request}"   ///
\ "\textbf{Education}  & &  \\ \quad Share with a bachelor" \ "\quad Share with a master" ///
\ "\quad Share with a CS degree"\ "\quad Share with an IvyPlus degree" \ ///
"\textbf{Preferences}  & &  \\ \quad Share looking for full time job" ///
\ "\quad Share looking for a job in San Francisco" \ "\quad Share in need of visa sponsorship" \ ///
"\textbf{Work History}  & &  \\ \quad Years of total experience (Mean)"  \ ///
"\quad Share that worked at a FAANG" \ "\quad Share leading a team"  \ "\quad Share employed" \ ///
"\quad Number days unemployed (Median)" \ "\textbf{Occupation}  & &  \\ \quad Share of software engineers" \ ///
"\quad Share of web designers" \ "\quad Share of product managers" ) 



* Reload Data Set
 
$opendata
$dropvars
$timerestrict

	
* General Information 
*--------------------------------------*
 matrix C=J(1,10,.)
 
* Number of companies
	unique companyid 
	mat C[1,2]=	 r(sum)
* Number of jobs
		unique jobid
	mat C[1,4]=	 r(sum)
*Number of interview request sent
	count if !mi(d_salary)
	mat C[1,6] = r(N)
*Number of final offers made
	count if !mi(df_salary)
	mat C[1,8] = r(N)
*Number of cities
	unique d_location if d_location != .
	mat C[1,10] = r(sum)
			
	
frmttable using "$tables/table1.tex", tex statmat(C) varlabels ///
fragment sdec(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) append ///
rtitles("\noalign{\smallskip} \hline \multicolumn{11}{c}{\textit{Panel B: Descriptive Statistics on Companies}} \\ \textbf{Number of:} && \textbf{Firms} && \textbf{Jobs} && \textbf{Bids sent} & \multicolumn{3}{c}{\textbf{Final offers}} & \textbf{Cities} \\ ")


* Descriptives of Firms
*--------------------------------------*
matrix T =J(2,10,.)

preserve
	*Collapse information to firm level:
	egen d_location_mode = mode(d_location), by(companyid)
	describe d_location
	collapse (first) platform* company_name_norm (firstnm) d_location_mode, by(companyid)
	label values d_location_mode loc
	*Information about Revenue and Age:
	gen  firm_age = 2019-platform_founded
	tab platform_revenue
	replace platform_revenue = 1 if platform_revenue == 3
	replace platform_revenue = 750 if platform_revenue == 3000 | platform_revenue == 7500 | platform_revenue == 15000
	local tot = 0
	 foreach rev of numlist 1 15 65 300 750 {
		 gen dummy_rev`rev' = 1 if platform_revenue==`rev'
		 replace dummy_rev`rev' = 0 if platform_revenue!=`rev' & !mi(platform_revenue)
		 sum dummy_rev`rev', d
		 local tot = `tot' + `: di %3.0fc r(mean) * 100'
	 }
	 
	 di `tot'	 	 
	 foreach rev of numlist 1 15 65 300 750 {
	 if `rev' ==1{
		local row 2
	 }
	  if `rev' ==15{
		local row 4
	 }
	 if `rev' ==65{
		local row 6 
	 }
	 if `rev' ==300{
		local row 8 
	 }
	 if `rev' ==750{
		local row 10 
	 }
	 sum dummy_rev`rev' , d
	 mat T[1,`row'] = round(`: di %3.1fc ((`: di %3.0fc r(mean) * 100') / `tot') * 100')
	 }
	 
	 sum platform_revenue
	 local no_revenue : di %5.0fc r(N)
	 
	 tab firm_age
	 gen dummy_age_0_5 = firm_age >= 0 & firm_age <= 5
	 replace dummy_age_0_5 = . if mi(firm_age)
	 sum dummy_age_0_5, d
	 mat T[2,2] = r(mean) *100
	 
	 gen dummy_age_6_10 = firm_age >= 6 & firm_age <= 10
	 replace dummy_age_6_10 = . if mi(firm_age)
	 sum dummy_age_6_10, d
	 mat T[2,4] = r(mean) *100
	 
	 gen dummy_age_11_15 = firm_age >= 11 & firm_age <= 15
	 replace dummy_age_11_15 = . if mi(firm_age)
	 sum dummy_age_11_15, d
	 mat T[2,6] = r(mean) *100
	 
	 gen dummy_age_16_20 = firm_age >= 16 & firm_age <= 20
	 replace dummy_age_16_20 = . if mi(firm_age)
	 sum dummy_age_16_20, d
	 mat T[2,8] = r(mean) *100
	 
	 gen dummy_age_21 = firm_age >= 21 & !mi(firm_age)	
	 replace dummy_age_21 = . if mi(firm_age)	 
	 sum dummy_age_21, d
	 mat T[2,10] = r(mean) *100
	 
	 sum firm_age
	 local no_age : di %5.0fc r(N)
	 
frmttable using "$tables/table1.tex", append tex statmat(T) ///
varlabels  fragment sdec( 0 \ 0)  ///
rtitles("\noalign{\smallskip} \hline \noalign{\smallskip} \textbf{Revenue (yearly, in Million USD)} && \textbf{1-25} && \textbf{26-100} && \textbf{101-500} && \textbf{501-1,000} && \textbf{1,000+} \\ Share (N = `no_revenue')" \ ///
"\noalign{\smallskip} \hline \noalign{\smallskip} \textbf{Firm Age (in years)} && \textbf{0-5} && \textbf{6-10} && \textbf{11-15} && \textbf{16-20} && \textbf{20+} \\ Share (N = `no_age')") bracket( "" "\%" \ "" "") substat(0)
	 
* Share of each firm size 
	matrix J =J(3,10,.)
	
	gen dummy_cs1000 = 1 if inrange(platform_company_size, 800, 20000)
	replace dummy_cs1000 = 0 if !inrange(platform_company_size, 800, 20000) & !mi(platform_company_size)

	foreach size of numlist  8  32  125 350 750  {
	gen dummy_cs`size' = 1 if platform_company_size==`size'
	replace dummy_cs`size' = 0 if platform_company_size!=`size' & !mi(platform_company_size)

	 if `size' ==8{
		local row 2
	 }
	 if `size' ==32{
		local row 4
	 }
	  if `size' ==125{
		local row 6
	 }
	 if `size' ==350{
		local row 8 
	 }
	 if `size' ==750{
	 	replace dummy_cs`size' = 1 if dummy_cs1000 == 1
		local row 10 
	 }
	 
	 sum dummy_cs`size' , d
	 mat J[1,`row'] = r(mean) *100
	 }
	 
* Top3 Locations (input from location of the job)
	tab d_location, sort
	fre d_location
	 		 * In SF area
	gen dummy_SF = 1 if d_location_mode==1
	replace dummy_SF = 0 if d_location_mode!=1 & !mi(d_location_mode)
	replace dummy_SF = . if d_location_mode == 6 | d_location_mode == 13 | ///
						d_location_mode == 16 | d_location_mode == 17 | ///
						d_location_mode	== 18 | d_location_mode == 19
	* In New York
	gen dummy_NY = 1 if d_location_mode==2 
	replace dummy_NY = 0 if d_location_mode!=2 & !mi(d_location_mode)
	replace dummy_NY = . if d_location_mode == 6 | d_location_mode == 13 | ///
							d_location_mode == 16 | d_location_mode == 17 | ///
							d_location_mode	== 18 | d_location_mode == 19
	* In Los Angeles
	gen dummy_LA = 1 if d_location_mode==3 
	replace dummy_LA = 0 if d_location_mode!=3  & !mi(d_location_mode)
	replace dummy_LA = . if d_location_mode == 6 | d_location_mode == 13 | ///
							d_location_mode == 16 | d_location_mode == 17 | ///
							d_location_mode	== 18 | d_location_mode == 19

	sum dummy_SF, d
	mat J[2,2] = r(mean)*100
	 
	sum dummy_NY, d
	mat J[2,4] = r(mean)*100

	sum dummy_LA, d
	mat J[2,6] = r(mean)*100
	 

	 
* Top3 industries (on bid level)	
	*Top1: Software
	gen dummy_ind_tech= 1 if platform_industry == 18 
	replace dummy_ind_tech = 0 if platform_industry != 18 & !mi(platform_industry)
		 
	*Top2: Finance
	gen dummy_ind_fin=1 if platform_industry == 5
	replace dummy_ind_fin=0 if platform_industry != 5 & !mi(platform_industry)
		 
	*Top3: Analytics
	gen dummy_ind_lyt= 1 if platform_industry == 2 
	replace dummy_ind_lyt=0 if  platform_industry != 2 & !mi(platform_industry)
		
	  
	sum dummy_ind_tech, d
	mat J[3,2] = r(mean)*100
	 
	sum dummy_ind_fin, d
	mat J[3,4] = r(mean)*100

	sum dummy_ind_lyt, d
	mat J[3,6] = r(mean)*100
	local no_industry : di %5.0fc r(N)
	 
	qui sum dummy_LA
	local no_location : di %5.0fc r(N)

	qui sum dummy_cs1000
	local no_cs : di %5.0fc r(N)

	qui sum platform_health_ins
	local no_benefits : di %5.0fc r(N)
	
restore

frmttable using "$tables/table1.tex", append tex statmat(J) ///
varlabels  fragment sdec( 0 \ 0)  ///
rtitles("\noalign{\smallskip} \hline \noalign{\smallskip} \textbf{Firm Size (Nb. Employees)} &&\textbf{1-10}&&\textbf{11-50}&&\textbf{51-200}&&\textbf{201-500}&&\textbf{500+} \\ Share (N = `no_cs')" \ ///
"\noalign{\smallskip} \hline \noalign{\smallskip} \textbf{Top 3 Locations} && \textbf{SF} && \textbf{NY} && \textbf{LA} \\ Share (N = `no_location')" \ ///
 "\noalign{\smallskip} \hline \noalign{\smallskip} \textbf{Top 3 Industries} && \textbf{Software} && \textbf{Finance} && \textbf{Analytics} \\ Share (N = `no_industry')") bracket( "" "\%" \ "" "") substat(0)
} 
 
 
 *----------------------------------------------------------------------------*
 *		--- Table II Gender Differences in the Ask Salary --- 	              *
 *----------------------------------------------------------------------------*

{
	
* Load Data Set

$opendata
$dropvars
$timerestrict

preserve
sort sid batch_start_date

** collapse at the sid level
collapse (first) female logs_salary $fcontrols $ccontrols $preferences smonthyear company_name, by(sid)

* raw gap + time FE
reg logs_salary i.female i.smonthyear , r

	estimates store OLS0_long1
	estadd scalar r2_adj =  e(r2_a)
	estadd local experiencefe "" 
	estadd local fieldfe "" 
	estadd local locationfe ""
	estadd local educationfe ""
	estadd local preferencefe ""
	estadd local employmentfe ""
	estadd local monthfe "X"
	estadd local firmfe ""
	
* raw gap +  experience  + location + field
reg logs_salary i.female i.smonthyear ///
	i.s_primary_field_exp s_total_exp s_total_exp2 i.s_choice_location ///
	i.s_current_location i.s_primary_field i.nbpastbatch i.batch_length, r
	
	estimates store OLS0_long2
	estadd scalar r2_adj =  e(r2_a)
	estadd local experiencefe "X" 
	estadd local locationfe "X"
	estadd local fieldfe "X" 
	estadd local educationfe ""
	estadd local preferencefe ""
	estadd local employmentfe ""
	estadd local monthfe "X"
	estadd local firmfe ""
	
* raw gap + experience + location + field + education 
reg logs_salary i.female i.smonthyear ///
	i.s_primary_field_exp s_total_exp s_total_exp2 i.s_choice_location ///
	i.s_current_location i.s_primary_field i.nbpastbatch i.batch_length /// 
	i.cat_degree i.csdegree i.ivyplus s_grad_year i.ranking, r
	
	estimates store OLS0_long3
	estadd scalar r2_adj =  e(r2_a)
	estadd local experiencefe "X" 
	estadd local locationfe "X"
	estadd local fieldfe "X" 
	estadd local external "X"
	estadd local educationfe "X"
	estadd local preferencefe ""
	estadd local employmentfe ""
	estadd local monthfe "X"
	estadd local firmfe ""
	
* raw gap + experience + location + field + education + preferences
reg logs_salary i.female i.smonthyear ///
i.s_primary_field_exp s_total_exp s_total_exp2 ///
	i.s_primary_field_exp s_total_exp s_total_exp2 i.s_choice_location ///
	i.s_current_location i.s_primary_field i.nbpastbatch i.batch_length /// 
	i.cat_degree i.csdegree i.ivyplus s_grad_year i.ranking ///
	i.s_contract i.s_search_status i.s_sponsorship i.(${preferences}), r
	
	estimates store OLS0_long4
	estadd scalar r2_adj =  e(r2_a)
	estadd local experiencefe "X" 
	estadd local locationfe "X"
	estadd local fieldfe "X" 
	estadd local external "X"
	estadd local educationfe "X"
	estadd local preferencefe "X"
	estadd local employmentfe ""
	estadd local monthfe "X"
	estadd local firmfe ""

* raw gap + experience + location + field + external websites + education + preferences + employment 
reg logs_salary i.female $controls, r
	
	estimates store OLS0_long5
	estadd scalar r2_adj =  e(r2_a)
	estadd local experiencefe "X" 
	estadd local locationfe "X"
	estadd local fieldfe "X" 
	estadd local external "X"
	estadd local educationfe "X"
	estadd local preferencefe "X"
	estadd local employmentfe "X"
	estadd local monthfe "X"
	estadd local firmfe ""

* Most recent company FE
encode company_name, gen(firm_name)
reghdfe logs_salary i.female $controls, absorb(firm_name) vce(robust)

	estimates store OLS0_firmFE
	estadd scalar r2_adj=  e(r2_a_within)
	estadd local experiencefe "X" 
	estadd local locationfe "X"
	estadd local fieldfe "X" 
	estadd local external "X"
	estadd local educationfe "X"
	estadd local preferencefe "X"
	estadd local employmentfe "X"
	estadd local monthfe "X"
	estadd local firmfe "X"
restore

* Regression at the bid level

reg logs_salary i.female $controls if !mi(d_salary), vce(cluster sid)

	estimates store OLS0_alt
	estadd scalar r2_adj = e(r2_a)
	estadd local experiencefe "X" 
	estadd local locationfe "X"
	estadd local fieldfe "X" 
	estadd local external "X"
	estadd local educationfe "X"
	estadd local preferencefe "X"
	estadd local employmentfe "X"
	estadd local monthfe "X"
	estadd local firmfe ""

esttab OLS0_long1 OLS0_long2 OLS0_long3 OLS0_long4 OLS0_long5 OLS0_firmFE OLS0_alt ///
using "$tables/table2.tex", $tableask 
	     
}
 

  *----------------------------------------------------------------------------*
 * 		--- Table III Estimates for Controls other than Gender in Equations 2  *
 *                  and 7 and for Final Offers ---							  *
 *----------------------------------------------------------------------------* 

{
	
* Load Data Set

$opendata
$dropvars
$timerestrict
	
*  Ask Gap

preserve
sort sid batch_start_date
collapse (first) female logs_salary $fcontrols $ccontrols $preferences smonthyear, by(sid)

reg logs_salary i.female $controls, r 
estimates store OLS0_1
estadd local hascontrols "X"
estadd local timefe "X"
restore

* Mean Center Log Ask
sum logs_salary, meanonly
gen centered_logask = logs_salary - `r(mean)'

* Bid Gap

ivreg2 logd_salary i.female $controls, cluster(jobid sid)
estimates store OLS1_2
estadd local hascontrols "X"
estadd local timefe "X"

ivreg2 logd_salary i.female##c.centered_logask $controls, cluster(jobid sid)
estimates store OLS1_3
estadd local hascontrols "X"
estadd local timefe "X"

* Final Gap 

ivreg2 logdf_salary i.female $controls, cluster(jobid sid)
estimates store OLS2_4
estadd local hascontrols "X"
estadd local timefe "X"

ivreg2 logdf_salary i.female##c.centered_logask $controls, cluster(jobid sid)
estimates store OLS2_5
estadd local hascontrols "X"
estadd local timefe "X"


esttab OLS0_1 OLS1_2 OLS1_3 OLS2_4 OLS2_5 ///
using "$tables/table3.tex", $tablecontrols  
	   

}

 *----------------------------------------------------------------------------*
 * 		--- Table IV The Role of the Ask and Resume Characteristics		 	  *
 *					  in Bid Salary Gender Differences ---   				  *
 *----------------------------------------------------------------------------*
 
{
* Load Data Set

$opendata
$dropvars
$timerestrict

* mean center the log ask salary
sum logs_salary, meanonly
gen centered_logask = logs_salary - `r(mean)'

ivreg2 logd_salary i.female i.smonthyear , cluster(jobid sid)
estimates store OLS11
estadd scalar r2_adj = e(r2_a)
estadd local hascontrols ""
estadd local timefe "X"
 
ivreg2 logd_salary i.female $controls, cluster(jobid sid)
estimates store OLS12
estadd scalar r2_adj = e(r2_a)
estadd local hascontrols "X"
estadd local timefe "X"

ivreg2 logd_salary i.female centered_logask i.smonthyear, cluster(jobid sid)
estimates store OLS13
estadd scalar r2_adj = e(r2_a)
estadd local hascontrols ""
estadd local timefe "X"

ivreg2 logd_salary i.female centered_logask $controls, cluster(jobid sid)
estimates store OLS14
estadd scalar r2_adj = e(r2_a)
estadd local hascontrols "X"
estadd local timefe "X"

ivreg2 logd_salary i.female##c.centered_logask $controls, cluster(jobid sid)
estimates store OLS15
estadd scalar r2_adj = e(r2_a)
estadd local hascontrols "X"
estadd local timefe "X"

* adding Job FE
reghdfe logd_salary i.female i.smonthyear, absorb(jobid) vce(cluster jobid sid)
estimates store OLS11b
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols ""
estadd local timefe "X"
estadd local jobfe "X"

reghdfe logd_salary i.female $controls, absorb(jobid) vce(cluster jobid sid)
estimates store OLS12b
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols "X"
estadd local timefe "X"
estadd local jobfe "X"

reghdfe logd_salary i.female centered_logask $controls, absorb(jobid) vce(cluster jobid sid)
estimates store OLS13b
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols "X"
estadd local timefe "X"
estadd local jobfe "X"

esttab OLS11 OLS12 OLS13 OLS14 OLS15 OLS11b OLS12b OLS13b using "$tables/table4.tex", $tablebidjob 


 }

 



 *----------------------------------------------------------------------------*
 * 		--- Table V The Role of the Ask and Resume Characteristics	  		  *
 *					in Final Offer Gender Differences ---					  *
 *----------------------------------------------------------------------------*
 
{
	
* Load Data Set

$opendata
$dropvars
$timerestrict

* mean center the log ask salary
sum logs_salary, meanonly
gen centered_logask = logs_salary - `r(mean)'	

ivreg2 logdf_salary i.female i.smonthyear, cluster(jobid sid)
estimates store OLS21
estadd scalar r2_adj = e(r2_a)
estadd local hascontrols ""
estadd local timefe "X"
estadd local firmfe ""
 
ivreg2 logdf_salary i.female $controls, cluster(jobid sid)
estimates store OLS22
estadd scalar r2_adj = e(r2_a)
estadd local hascontrols "X"
estadd local timefe "X"
estadd local firmfe ""
 
ivreg2 logdf_salary i.female centered_logask i.smonthyear, cluster(jobid sid)
estimates store OLS23
estadd scalar r2_adj = e(r2_a)
estadd local hascontrols ""
estadd local timefe "X"
estadd local firmfe ""

ivreg2 logdf_salary i.female centered_logask $controls, cluster(jobid sid)
estimates store OLS24
estadd scalar r2_adj = e(r2_a)
estadd local hascontrols "X"
estadd local timefe "X"
estadd local firmfe ""

ivreg2 logdf_salary i.female##c.centered_logask $controls, cluster(jobid sid)
estimates store OLS25
estadd scalar r2_adj = e(r2_a)
estadd local hascontrols "X"
estadd local timefe "X"
estadd local firmfe ""

* Adding firm FE
reghdfe logdf_salary i.female $controls , absorb(companyid) vce(cluster jobid sid)
estimates store OLS26
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols "X"
estadd local timefe "X"
estadd local firmfe "X"

reghdfe logdf_salary i.female centered_logask $controls , absorb(companyid) vce(cluster jobid sid)
estimates store OLS27
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols "X"
estadd local timefe "X"
estadd local firmfe "X"

reghdfe logdf_salary i.female##c.centered_logask $controls , absorb(companyid) vce(cluster jobid sid)
estimates store OLS28
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols "X"
estadd local timefe "X"
estadd local firmfe "X"

esttab OLS21 OLS22 OLS23 OLS24 OLS25 OLS26 OLS27 OLS28 ///
       using "$tables/table5.tex", $tablefinal 


}


 *----------------------------------------------------------------------------*
 * 		--- Table VI Gender Differences in the Number of Bids Received and    *
 *                   the Probability of Receiving a Final Offer 		  	  *
 *                   after an Interview ---  								  *
 *----------------------------------------------------------------------------*

{

* Load Data Set

$opendata
$dropvars
$timerestrict 	
 
* Nb of bids received
preserve

* rescale, square and label ask salary
replace s_salary = s_salary / 100000
gen s_salary2 = s_salary^2
label var s_salary2 "\textbf{Ask salary$^2$}"

sort batchid batch_start_date

collapse (first) logs_salary sid female s_salary s_salary2 $ccontrols $fcontrols $preferences  ///
		 (sum) 	 d_sent df_sent (mean) logd_salary, by(batchid)

ren d_sent nb_d_received

** just on female
poisson nb_d_received i.female i.smonthyear, vce(cluster sid)
margins , dydx(1.female) post
matrix b = e(b)
scalar margin = b[1,2]
reg nb_d_received i.female i.smonthyear, vce(cluster sid)
estimates store d_sent0
	estadd scalar margin  
	estadd local hascontrols ""
	estadd local timefe "X" 
	estadd local jobfe "" 


** incl. other controls
poisson nb_d_received i.female $controls, vce(cluster sid)
margins , dydx(1.female)  post
matrix b = e(b)
scalar margin = b[1,2]
reg nb_d_received i.female  $controls, vce(cluster sid)
estimates store d_sent1
	estadd scalar margin  
	estadd local hascontrols "X"
	estadd local timefe "X" 
	estadd local jobfe "" 


** incl. controls & salary
poisson nb_d_received i.female s_salary $controls, vce(cluster sid)
margins , dydx(1.female)  post
matrix b = e(b)
scalar margin = b[1,2]
reg nb_d_received i.female s_salary $controls, vce(cluster sid)
estimates store d_sent2
	estadd scalar margin  
	estadd local hascontrols "X"
	estadd local timefe "X" 
	estadd local jobfe "" 


** incl. controls & salary and salary^2
poisson nb_d_received i.female s_salary s_salary2 $controls, vce(cluster sid)
margins , dydx(1.female)  post
matrix b = e(b)
scalar margin = b[1,2]
reg nb_d_received i.female s_salary s_salary2 $controls, vce(cluster sid)
estimates store d_sent3
	estadd scalar margin  
	estadd local hascontrols "X"
	estadd local timefe "X" 
	estadd local jobfe "" 


**  incl. controls & salary and interaction
poisson nb_d_received i.female s_salary i.female#c.s_salary $controls, vce(cluster sid)
margins , dydx(1.female)  post
matrix b = e(b)
scalar margin = b[1,2]
reg nb_d_received i.female s_salary i.female#c.s_salary $controls, vce(cluster sid)
estimates store d_sent4
	estadd scalar margin  
	estadd local hascontrols "X"
	estadd local timefe "X" 
	estadd local jobfe ""
	
restore

* Final offers received

preserve
* rescale, square and label ask salary
replace s_salary = s_salary / 100000
gen s_salary2 = s_salary^2
label var s_salary2 "\textbf{Ask salary$^2$}"

** data construction
keep if d_accept == 1 /* we are focusing on jobs for which the candidate interviews */

** logit  on female and observables 
* no report of logit std errors so ok that not two-way
logit df_sent i.female  $controls, vce(cluster jobid)  
margins , dydx(1.female)  post  
matrix b = e(b)
scalar margin = b[1,2]
ivreg2 df_sent i.female  $controls, cluster(jobid sid)
estimates store df_sent2  
	estadd scalar r2_adj = e(r2_a)
	estadd scalar margin  
	estadd local hascontrols "X"
	estadd local timefe "X" 
	estadd local jobfe "" 


** logit  on female and observables and s_salary
logit df_sent i.female s_salary s_salary2 $controls, vce(cluster jobid)  
margins , dydx(1.female)  post  
matrix b = e(b)
scalar margin = b[1,2]
ivreg2 df_sent i.female s_salary s_salary2 $controls, cluster(jobid sid)  
estimates store df_sent3
	estadd scalar r2_adj = e(r2_a)
	estadd scalar margin  
	estadd local hascontrols "X"
	estadd local timefe "X" 
	estadd local jobfe "" 


** logit  on female and observables and s_salary with Job FE

xtset jobid batchid
xtlogit df_sent i.female s_salary s_salary2 $controls, fe 
matrix b = e(b)
scalar margin = b[1,2]
reghdfe df_sent i.female s_salary s_salary2 $controls, absorb(jobid) vce(cluster jobid sid)
estimates store df_sent4
	estadd scalar r2_adj = e(r2_a_within)
	estadd scalar margin  
	estadd local hascontrols "X"
	estadd local timefe "X" 
	estadd local jobfe "X" 	
	
restore

esttab d_sent0 d_sent1 d_sent2 d_sent3 d_sent4 df_sent2 df_sent3 df_sent4 ///
using "$tables/table6.tex", $tableextent 
}


 *----------------------------------------------------------------------------*
 * 		--- Table VII The Effect of the Reform on the Gender Gap ---   	      *
 *----------------------------------------------------------------------------*  
 
{
	
* Load Data Set
* no longer restrict to pre-period
$opendata
$dropvars
* SF software engineers
keep if s_primary_field == 4 & s_choice_location == 1

	
* Ask gap	
preserve

sort sid batch_start_date
** collapse at the sid level
collapse (first) female after female_after logs_salary $fcontrols $ccontrols $preferences smonthyear smonth, by(sid)

reg logs_salary i.female after i.female_after $expcontrols , r
	estimates store es_askgap_c
	estadd local hasexp ""
	estadd local hascontrols "X"
	sum logs_salary 
	estadd scalar depmean = r(mean)
	
restore

* Weighted Ask gap
reg logs_salary i.female after i.female_after $expcontrols if !mi(d_salary), vce(cluster sid)

* We save the regression result in a dataset (for Figure 6)
regsave  			1.female 1.female_after	using "$data/eventstudy_experience.dta", replace ci addlabel(experience, -1,  salary ,"ws_salary")
	estimates store es_askgap_cw
	estadd local jobfe ""
	estadd local hascontrols "X" 
	estadd scalar r2_adj = e(r2_a)
	sum logs_salary  if !mi(d_salary)
	estadd scalar depmean = r(mean)
	
* Weighted Ask gap by experience (for Figure 6)
forvalues i=0/2{
reg logs_salary i.female after i.female_after $expcontrols  if exp_group == `i' & !mi(d_salary), vce(cluster sid)
regsave  			1.female 1.female_after 	using "$data/eventstudy_experience.dta", append ci addlabel(experience, `i',  salary ,"ws_salary")
}

* Bid gap 
ivreg2 logd_salary i.female after i.female_after $expcontrols , cluster(sid jobid)
regsave  		1.female 1.female_after 	using "$data/eventstudy_experience.dta", append ci addlabel(experience, -1,  salary ,"d_salary") 
estimates store es_bidgap_c
	estadd local jobfe ""
	estadd local hascontrols "X" 
	estadd scalar r2_adj = e(r2_a)
	sum logd_salary 
	estadd scalar depmean = r(mean)

* Bid gap by experience
forvalues i=0/2{
ivreg2 logd_salary i.female after i.female_after $expcontrols  if exp_group == `i' , cluster(sid jobid)
regsave  			1.female 1.female_after 	using "$data/eventstudy_experience.dta", append ci addlabel(experience, `i',  salary ,"d_salary")

}

* Bid gap control for ask
sum logs_salary, meanonly
gen centered_logask = logs_salary - `r(mean)'
ivreg2 logd_salary centered_logask i.female after i.female_after  $expcontrols, cluster (sid jobid) 
estimates store es_bidgap_cs
	estadd local jobfe ""
	estadd local hascontrols "X" 
	estadd scalar r2_adj = e(r2_a)
	sum logd_salary 
	estadd scalar depmean = r(mean)

* Bid gap with job FE
reghdfe logd_salary  i.female after i.female_after  $expcontrols, absorb(jobid) vce(cluster jobid sid)
estimates store es_bidgap_fe
	estadd local jobfe ""
	estadd local hascontrols "X" 
	estadd scalar r2_adj = e(r2_a_within)
	sum logd_salary if !mi(after)
	estadd scalar depmean = r(mean)

* Final gap
ivreg2 logdf_salary  i.female after i.female_after  $expcontrols, cluster (sid jobid) 
estimates store es_finalgap_c
	estadd local jobfe ""
	estadd local hascontrols "X" 
	estadd scalar r2_adj = e(r2_a)
	sum logdf_salary 
	estadd scalar depmean = r(mean)

esttab es_askgap_c es_askgap_cw es_bidgap_c es_bidgap_cs es_bidgap_fe es_finalgap_c using "$tables/table7.tex", $tablebexp1 

}
 
 
 *----------------------------------------------------------------------------*
 * 		--- Table VIII The Effect of the Reform at the Extensive Margin ---   *
 *----------------------------------------------------------------------------*
 
 
{

* Load Data Set
* no longer restrict to pre-period
$opendata
$dropvars
* SF software engineers
keep if s_primary_field == 4 & s_choice_location == 1

preserve
* generate time to first offer per person (conditional on getting one)
bys batchid (d_sent_date) : gen first_offer_time =hours(d_sent_date - batch_start_date)  if _n == 1
bys batchid : egen first_offer_hours = mean(first_offer_time)
drop first_offer_time

sort sid batch_start_date

** collapse at the sid level
collapse (first) female after female_after logs_salary $fcontrols $ccontrols $preferences smonthyear smonth first_offer_hours (sum) d_sent (max) df_sent, by(sid)

ren d_sent nb_d_received
ren df_sent df_received

* effect on the number of bids received by a candidate (col 1)
poisson nb_d_received i.female after i.female_after $expcontrols , vce(cluster sid)
margins , dydx(1.female_after)  post
matrix b = e(b)
scalar marginb = b[1,2]
reg nb_d_received i.female after i.female_after $expcontrols, vce(cluster sid)
estimates store es_nb_d
estadd scalar marginb 
sum nb_d_received 
estadd scalar depmean = r(mean)
estadd local hascontrols "X" 

* including controls for ask and ask squared (col 6)
reg nb_d_received i.female after i.female_after $expcontrols s_salary s_salary2, vce(cluster sid)
estimates store es_nb_ds
estadd local hascontrols "X" 

* predicted change in number of bids received by a candidate (col 7)
reg nb_d_received i.female $expcontrols if after == 0, vce(cluster sid)
predict es_nb_dpred, xb
reg  es_nb_dpred i.female after i.female_after  ,  vce(cluster sid)
estimate store es_nb_dpred
estadd local hascontrols "" 

* prediction including controls for ask and ask squared (col 8)
reg nb_d_received i.female $expcontrols s_salary s_salary2 if after == 0, vce(cluster sid)
predict es_nb_dspred, xb
reg  es_nb_dspred i.female after i.female_after  ,  vce(cluster sid)
estimate store es_nb_dspred
estadd local hascontrols "" 

* effect on arrival time of first interview request (col 2)
poisson first_offer_hours i.female after i.female_after $expcontrols , vce(cluster sid)
margins , dydx(1.female_after)  post
matrix b = e(b)
scalar marginb = b[1,2]
reg first_offer_hours i.female after i.female_after $expcontrols, vce(cluster sid)
estimates store es_time_c
estadd scalar marginb 
sum first_offer_hours 
estadd scalar depmean = r(mean)
estadd local hascontrols "X"

* whether there was a final offer sent to the candidate (col 3)
poisson df_received i.female after i.female_after $expcontrols , vce(cluster sid)
margins , dydx(1.female_after)  post
matrix b = e(b)
scalar marginb = b[1,2]
reg df_received i.female after i.female_after $expcontrols, vce(cluster sid)
estimates store es_nb_df
estadd scalar marginb 
sum df_received 
estadd scalar depmean = r(mean)
estadd local hascontrols "X" 

restore

import delimited using "$indata/unique_ranking.csv", clear
ren cid companyid
drop id

sort pi_1
gen rank = _n

sum rank, d
local max = r(max)

gen norm_rank = (rank/`max')*100

tempfile ranking
save `ranking'

* Merge with ranking data
merge m:1 companyid using `ranking'


* firm rank on bid level 

* col 4 (firm rank bid)	
ivreg2 norm_rank i.female after i.female_after $expcontrols, cluster(sid jobid)
estimates store rank_1
sum norm_rank
	estadd depmean = r(mean)
	estadd local hascontrols "X" 

	   
* col 5 (firm rank final)
ivreg2 norm_rank i.female after i.female_after $expcontrols if !mi(logdf_salary), cluster(sid jobid)
estimates store rank_2
sum norm_rank if !mi(logdf_salary)
	estadd depmean = r(mean)
	estadd local hascontrols "X" 

esttab es_nb_d es_time_c es_nb_df rank_1 rank_2 es_nb_ds es_nb_dpred es_nb_dspred ///
using "$tables/table8.tex", $tablebexp2 
}
 

